"""
@Project ：BiShe 
@File    ：load_vector_db.py
@IDE     ：PyCharm 
@Author  ：zhupp
@Date    ：2024/9/28 15:31 
"""
import os
from langchain_community.embeddings import HuggingFaceBgeEmbeddings
from langchain_community.vectorstores import Chroma

model_embedding = "G:/M3E_model/xrunda/m3e-base"
model_kwargs = {'device': 'cuda'}

hf = HuggingFaceBgeEmbeddings(
    encode_kwargs={'normalize_embeddings': True},
    model_name=model_embedding,
    model_kwargs=model_kwargs,
    query_instruction="为这个句子生成表示以用于检索相关文章："
)


def load_vector_db(persist_directory):
    """加载已有的向量数据库。"""
    if not os.path.exists(persist_directory):
        raise FileNotFoundError(f"Database directory not found: {persist_directory}")

    db = Chroma(
        persist_directory=persist_directory,
        embedding_function=hf
    )
    return db


